#!/usr/bin/env python
# encoding: utf-8
import threading

from ThreadProcessor import ThreadPool
from request import Request
import types
from logger import logger

from request_downloader import RequestDownloader
from concurrent.futures import ThreadPoolExecutor, wait, ALL_COMPLETED
from queue import Queue, Empty
import time


class Spider(object):
    """
    threadoptions:
        queueTimeOut: when queueTimeOut sec can not get item from queue, assume that process is done. Stop thread
        threadCount: how may thread will be create in thread pool, default 10
        multiplethread: where to use mutiple thread mode, default False, if it set to False, queueTimeOut, threadCount
                        will be ignore
    """

    def __init__(self, process=None, threadoptions: dict = {}):
        super().__init__()
        self.process = process
        self.__pipeline_list = []
        self.__supportJs = False if not hasattr(process, "supportJs") else process.supportJs
        self.__timeout = 60 if not hasattr(process, "timeout") else process.timeout
        self.__useProxy = False if not hasattr(process, "useProxy") else process.useProxy
        self.__queue = Queue()
        # when queueTimeOut sec can not get item from queue, assume that process is done. Stop thread.
        self.__queueTimeOut = threadoptions.get("queueTimeOut", None)
        self.__threadCount = threadoptions.get("threadCount", 10)
        self.__multiple_thread = threadoptions.get("multiplethread", False)
        self.__processStop = False

    def __start_in_single_mode(self):
        for url in self.process.start_url:
            self.__doRequest(url=url)

    def start(self):
        start = time.time();
        if self.__multiple_thread:
            logger.info("begin to scrap in multiple thread mode")
            self.__start_in_async_mode()
        else:
            logger.info("begin to scrap in single thread mode")
            self.__start_in_single_mode()
        print("process time:", str((time.time() - start)))

    def __start_in_async_mode(self):
        self.__doRequestAsync()
        self.__doQueue()

    def addPipeline(self, pipeline):
        if pipeline:
            self.__pipeline_list.append(pipeline)
        return self

    def __processPipeline(self, item):
        for pipeline in self.__pipeline_list:
            pipeline.process_item(item)

    def __processDownerloader(self, downloader):
        response = downloader.process_request()
        try:
            callback = downloader.request.callback(response)
        except Exception as e:
            self.__processStop = True
            print(e)
        if isinstance(callback, types.GeneratorType):
            for item in callback:
                if isinstance(item, Request):
                    otherdownloader = RequestDownloader(request=item)
                    self.__queue.put(otherdownloader)
                else:
                    self.__queue.put(item)

    def __doQueue(self):
        with ThreadPoolExecutor(self.__threadCount) as executor:
            while not self.__processStop:
                try:
                    q = self.__queue.get(timeout=self.__queueTimeOut)
                    if isinstance(q, RequestDownloader):
                        executor.submit(self.__processDownerloader, q)
                    else:
                        executor.submit(self.__processPipeline, q)
                except Empty:
                    self.__processStop = True

    def __doRequestAsync(self):
        with ThreadPoolExecutor(self.__threadCount) as executor:
            for url in self.process.start_url:
                request = Request(url=url, headers=self.process.headers, supportJs=self.__supportJs,
                                  timeout=self.__timeout, useProxy=self.__useProxy)
                request.callback = self.process.process
                downloader = RequestDownloader(request=request)
                executor.submit(self.__processDownerloader, downloader)
                #self.__queue.put(downloader)

    def __doRequest(self, url="", request=None):
        if not request:
            request = Request(url=url, headers=self.process.headers, supportJs=self.__supportJs,
                              timeout=self.__timeout, useProxy=self.__useProxy)
        downloader = RequestDownloader(request=request)
        response = downloader.process_request()
        if downloader.status_code != 200:
            logger.info("response from url is " + str(downloader.status_code))
            pass
        if not request.callback:
            request.callback = self.process.process
        callback = request.callback(response)
        if isinstance(callback, types.GeneratorType):
            for item in callback:
                if isinstance(item, Request):
                    self.__doRequest(request=item)
                else:
                    for pipeline in self.__pipeline_list:
                        pipeline.process_item(item)
